# -*- coding:utf-8 -*-
import scrapy
import datetime
from scrapy.loader import ItemLoader
from test1.items import chinatax
import re

class chinataxspider(scrapy.Spider):
    name = 'chinatax'

    #起始url
    def start_requests(self):
        return [scrapy.FormRequest("http://hd.chinatax.gov.cn/fagui/action/InitCredit.do#",\
                        formdata={      'taxCode':'440300',\
                                        'cPage':'1',\
                                        'randCode':'61m75t94',\
                                        'flag':'1'\
                                         },\
                               callback=self.parse)]

    #发送各地区的请求
    def parse(self, response):
        #获取各地区编号
        u = response.xpath("//td[@class='sv_hei']/a/@onclick").extract()
        for sample in u:
            sample_ato =sample[23:-2]
            yield scrapy.FormRequest("http://hd.chinatax.gov.cn/fagui/action/InitCredit.do#",
                 formdata = {   'taxCode':sample_ato,\
                                'cPage':'1',\
                                'randCode':'61m75t94',\
                                'flag':'1'\
                            },callback=self.parse_info,dont_filter= True)

    def parse_info(self,response):
            tr_list = response.xpath("//table[@class='sv_center'][1]/tr/td[1]/table/tr[1]/td/table[2]/tr/td[@class='sv_hei']/table[1]/tr")
            for tr in tr_list:
                if '纳' not in tr.xpath('td[1]/text()').extract_first():
                    chinataxinfo = chinatax()
                    chinataxinfo['taxpayer_identification_number'] = tr.xpath('td[1]/text()').extract_first()
                    chinataxinfo['taxpayer_s_name'] = tr.xpath('td[2]/text()').extract_first()
                    chinataxinfo['annual_evaluation'] = tr.xpath('td[3]/text()').extract_first()
                    chinataxinfo['insert_time'] = str(datetime.datetime.now())
                    chinataxinfo['url'] = response.url
                    yield chinataxinfo
            u = response.xpath("//form[@name='searchForm']/input[@id ='cPage']/@value").extract_first()
            if u == '1':
                next_form = response.xpath("//td[@class='sv_hei']/table[2]/tr/td[@class='sv_black12_24']/a[2]/@onclick").extract_first()[21:-2]
                #发送第二页之后的请求
                for i in range(2,int(next_form)):
                    code = response.xpath("//form[@name='searchForm']/input[@id ='taxCode']/@value").extract_first()
                    yield scrapy.FormRequest("http://hd.chinatax.gov.cn/fagui/action/InitCredit.do#",
                         formdata = {   'taxCode':str(code),\
                                        'cPage':str(i),\
                                        'randCode':'61m75t94',\
                                        'flag':'1'\
                                    },callback=self.parse_info,dont_filter= True)